library(Rcpp)
#' Function that calls down to cpp and finds GCD
#'
#' @param A int
#' @param B int
#'
#' @return Greatest common denominator
#'
cGCD <- function(A,B) {
ans <- evalCpp(paste0("std::gcd(",A, ", ", B, ")"))
return(ans)
}
#' Function that calls down to cpp and finds LCM
#'
#' @param A int
#' @param B int
#'
#' @return Greatest common multiple
#'
cLCM <- function(A,B) {
ans <- evalCpp(paste0("std::lcm(",A, ", ", B, ")"))
return(ans)
}PS5
PS5
Daniel Rubio, 11/21/24
As an aid to my problem solving, I used UM GPT to probe for explanations when I was confused, and for suggestions of possible built in functions to use. I did not copy and paste the prompts into the AI, but rather used it to answer the questions I had as they arose.
Problem 1: Object Oriented Programming
(a) Defining “rational” class
Defining requested Rcpp functions:
Defining class:
rational <- setClass("rational",
slots = c(data = "numeric"))
setValidity('rational',function(object){
# object must be in form c(numerator, denominator)
if(length(object@data) != 2){
stop("Input must be vector of length two")
}
if(object@data[1]!= round(object@data[1]) | object@data[2]!= round(object@data[2] )){
stop("Numerator and denominator must be integers")
}
if(object@data[2] == 0){
stop("Denominator must be non-zero")
}
return(TRUE)
})Class "rational" [in ".GlobalEnv"]
Slots:
Name: data
Class: numeric
Methods:
setMethod("show", "rational",
function(object) {
cat(object@data[1], "/", object@data[2])
cat("\n")
return(invisible(object))
}
)
setGeneric("simplify",
function(object) {
standardGeneric("simplify")})[1] "simplify"
setMethod("simplify", "rational",
function(object) {
num <- object@data[1]
den <- object@data[2]
com_denom <- cGCD(num,den)
new_num <- num / com_denom
new_den <- den / com_denom
simplified <- rational(data = c(new_num,new_den))
return(simplified)
}
)
setGeneric("quotient",
function(object,...) {
standardGeneric("quotient")})[1] "quotient"
setMethod("quotient", "rational",
function(object, digits = 8) {
num <- object@data[1]
den <- object@data[2]
result <- num/den
if(round(result,digits) == round(result,digits+1)){
rest = ""
}
else{rest="..."}
cat(round(result, digits),rest, sep="")
cat("\n")
return(invisible(result))
}
)Operations:
##' @title `rational` addition
##'
##'
##' @param e1 A `rational`
##' @param e2 A `rational`
##' @return A `rational` that is the sum of e1 and e2
setMethod("+", signature(e1 = "rational", e2 = "rational"),
function(e1, e2) {
n1 <- e1@data[1]
d1 <- e1@data[2]
n2 <- e2@data[1]
d2 <- e2@data[2]
N = n1*d2+n2*d1
D = d1*d2
return(simplify(rational(data=c(N,D))))
}
)
##' @title `rational` subtraction
##'
##'
##' @param e1 A `rational`
##' @param e2 A `rational`
##' @return A `rational` that is the difference of e1 and e2
setMethod("-", signature(e1 = "rational", e2 = "rational"),
function(e1, e2) {
n1 <- e1@data[1]
d1 <- e1@data[2]
n2 <- e2@data[1]
d2 <- e2@data[2]
N = n1*d2-n2*d1
D = d1*d2
return(simplify(rational(data=c(N,D))))
}
)
##' @title `rational` multiplication
##'
##'
##' @param e1 A `rational`
##' @param e2 A `rational`
##' @return A `rational` that is the multiplication of e1 and e2
setMethod("*", signature(e1 = "rational", e2 = "rational"),
function(e1, e2) {
n1 <- e1@data[1]
d1 <- e1@data[2]
n2 <- e2@data[1]
d2 <- e2@data[2]
N = n1*n2
D = d1*d2
return(simplify(rational(data=c(N,D))))
}
)
##' @title `rational` division
##'
##'
##' @param e1 A `rational`
##' @param e2 A `rational`
##' @return A `rational` that is e1 divided by e2
setMethod("/", signature(e1 = "rational", e2 = "rational"),
function(e1, e2) {
n1 <- e1@data[1]
d1 <- e1@data[2]
n2 <- e2@data[1]
d2 <- e2@data[2]
N = n1*d2
D = d1*n2
return(simplify(rational(data=c(N,D))))
}
)(b) Using “rational” class
r1 = rational(data = c(24,6))
r2 = rational(data = c(7,230))
r3 = rational(data = c(0,4))r124 / 6
r30 / 4
r1 + r2927 / 230
r1 - r2913 / 230
r1 * r214 / 115
r1 / r2920 / 7
r1 + r34 / 1
r1 * r30 / 1
r2 / r3Error in h(simpleError(msg, call)): error in evaluating the argument 'object' in selecting a method for function 'simplify': Denominator must be non-zero
quotient(r1)4
quotient(r2)0.03043478...
quotient(r2, digits = 3)0.03...
quotient(r2, digits = 3.14)0.03...
quotient(r2, digits = "avocado")Error in round(result, digits): non-numeric argument to mathematical function
q2 <- quotient(r2, digits = 3)0.03...
q2[1] 0.03043478
quotient(r3)0
simplify(r1)4 / 1
simplify(r2)7 / 230
simplify(r3)0 / 1
(c) Not creating impossible rationals, other bad inputs
r4 = rational(data = c(1,0))Error in validityMethod(object): Denominator must be non-zero
r5 = rational(data = c(1,2,3))Error in validityMethod(object): Input must be vector of length two
r6 = rational(data = c(1))Error in validityMethod(object): Input must be vector of length two
r7 = rational(data = c("numerator","denominator"))Error in validObject(.Object): invalid class "rational" object: invalid object for slot "data" in class "rational": got class "character", should be or extend class "numeric"
r8 = rational(data = c(1.2,4))Error in validityMethod(object): Numerator and denominator must be integers
r8 = rational(data = c(1,4.3))Error in validityMethod(object): Numerator and denominator must be integers
r9 = rational(data = 2/9)Error in validityMethod(object): Input must be vector of length two
rm(list=ls())Problem 2: plotly
(a)
Load in data
data <- read.csv("df_for_ml_improved_new_market.csv")
# install.packages("tidyverse") ran once
# install.packages("plotly") ran once
library(tidyverse)── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
✔ dplyr 1.1.4 ✔ readr 2.1.5
✔ forcats 1.0.0 ✔ stringr 1.5.1
✔ ggplot2 3.5.1 ✔ tibble 3.2.1
✔ lubridate 1.9.3 ✔ tidyr 1.3.1
✔ purrr 1.0.2
── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
✖ dplyr::filter() masks stats::filter()
✖ dplyr::lag() masks stats::lag()
ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
library(plotly)
Attaching package: 'plotly'
The following object is masked from 'package:ggplot2':
last_plot
The following object is masked from 'package:stats':
filter
The following object is masked from 'package:graphics':
layout
Regenerate plot
genres = c("Photography","Print","Sculpture","Painting","Others")
data_long <- data %>%
pivot_longer(cols=starts_with("Genre___"), names_to = "Genre",values_to = "value") %>%
filter(value==1) %>%
select(-value) %>%
mutate(
Genre = fct_recode(
Genre,
"Others" = "Genre___Others",
"Photography" = "Genre___Photography",
"Print" = "Genre___Print",
"Sculpture" = "Genre___Sculpture",
"Painting" = "Genre___Painting"
)
)
g <- ggplot(data_long, aes(x=year,group=year, y=price_usd ) ) +
geom_boxplot() +
ggtitle("Artwork price Statistics by Genre 1997-2012")+
scale_y_log10(name = "Price (USD)") +
scale_x_continuous(breaks=seq(1997,2012, by=4), name="Year")+
facet_wrap("Genre")
g(b)
Creating Interactive plot with plotly
p <- plot_ly(data=data_long) |>
add_boxplot(x = ~ year, y = ~ price_usd) |>
add_boxplot(data=data_long[data_long$Genre=="Photography",],x = ~ year, y = ~ price_usd,visible=FALSE) |>
add_boxplot(data=data_long[data_long$Genre=="Print",],x = ~ year, y = ~ price_usd,visible=FALSE) |>
add_boxplot(data=data_long[data_long$Genre=="Sculpture",],x = ~ year, y = ~ price_usd,visible=FALSE) |>
add_boxplot(data=data_long[data_long$Genre=="Painting",],x = ~ year, y = ~ price_usd,visible=FALSE) |>
add_boxplot(data=data_long[data_long$Genre=="Others",],x = ~ year, y = ~ price_usd,visible=FALSE) |>
layout(updatemenus = list(
list(y = 1,
buttons = list(
list(method = "update",
args = list(list(visible = list(TRUE, FALSE, FALSE, FALSE, FALSE, FALSE)),
list(yaxis = list(title = "Price (USD)",type="log"))),
label = "Over Time, Overall"),
list(method = "update",
args = list(list(visible = list(FALSE, TRUE, FALSE, FALSE, FALSE, FALSE)),
list(yaxis = list(title = "Price (USD)",type="log"))),
label = "Photography"),
list(method = "update",
args = list(list(visible = list(FALSE, FALSE, TRUE, FALSE, FALSE, FALSE)),
list(yaxis = list(title = "Price (USD)",type="log"))),
label = "Print"),
list(method = "update",
args = list(list(visible = list(FALSE, FALSE, FALSE, TRUE, FALSE, FALSE)),
list(yaxis = list(title = "Price (USD)",type="log"))),
label = "Sculpture"),
list(method = "update",
args = list(list(visible = list(FALSE, FALSE, FALSE, FALSE, TRUE, FALSE)),
list(yaxis = list(title = "Price (USD)",type="log"))),
label = "Painting"),
list(method = "update",
args = list(list(visible = list(FALSE, FALSE, FALSE, FALSE, FALSE, TRUE)),
list(yaxis = list(title = "Price (USD)",type="log"))),
label = "Others"))
)
)) |>
layout(yaxis = list(title = "Price (USD)",type = "log"),
xaxis = list(title = "Year"),
title = "Artwork price Statistics by Genre 1997-2012")
prm(list=ls())Problem 3: PS4-1 data.table
Setting up problem with data.table
#install.packages("nycflights13") # Ran once
# install.packages("tidyverse") # Ran once
library(nycflights13)
library(data.table)
Attaching package: 'data.table'
The following objects are masked from 'package:lubridate':
hour, isoweek, mday, minute, month, quarter, second, wday, week,
yday, year
The following objects are masked from 'package:dplyr':
between, first, last
The following object is masked from 'package:purrr':
transpose
flights <- as.data.table(flights)
airports <- as.data.table(airports)
planes <- as.data.table(planes)(a)
flights[ ,
.(mean_delay = mean(dep_delay, na.rm = TRUE),
median_delay = median(dep_delay,na.rm = TRUE))
,by=origin] |> # First dt call calculates mean and median delay by origin
merge(x=_, airports,
by.x = "origin",
by.y = "faa")|> # merge with airports table to get name joining on origin/faa code
_[ , .(name, median_delay, mean_delay)] # print out name and stats name median_delay mean_delay
<char> <num> <num>
1: Newark Liberty Intl -1 15.10795
2: John F Kennedy Intl -1 12.11216
3: La Guardia -3 10.34688
flights[, .N , by = dest] |> # gets how many rows of each dest there are
_[N>=10] |> # filters all the rows with less than 10 flights
merge(x = _, y = flights)|> # merges these rows applying the filter to the main data
_[ , .(mean_delay = mean(arr_delay, na.rm = TRUE),
median_delay = median(arr_delay,na.rm = TRUE))
, by = dest] |> # call calculates mean and median arrival by dest on filtered dests
merge(x=_, airports,
by.x = "dest",
by.y = "faa")|># merge with airports table to get name joining on dest/faa code
_[ order(-mean_delay), .(name, median_delay, mean_delay)] # print out name and stats name median_delay mean_delay
<char> <num> <num>
1: Columbia Metropolitan 28.0 41.76415094
2: Tulsa Intl 14.0 33.65986395
3: Will Rogers World 16.0 30.61904762
4: Jackson Hole Airport 15.0 28.09523810
5: Mc Ghee Tyson 2.0 24.06920415
6: Dane Co Rgnl Truax Fld 1.0 20.19604317
7: Richmond Intl 1.0 20.11125320
8: Akron Canton Regional Airport 3.0 19.69833729
9: Des Moines Intl 0.0 19.00573614
10: Gerald R Ford Intl 1.0 18.18956044
11: Birmingham Intl -2.0 16.87732342
12: Theodore Francis Green State 1.0 16.23463687
13: Greenville-Spartanburg International -0.5 15.93544304
14: Cincinnati Northern Kentucky Intl -3.0 15.36456376
15: Savannah Hilton Head Intl -1.0 15.12950601
16: Manchester Regional Airport -3.0 14.78755365
17: Eppley Afld -2.0 14.69889841
18: Yeager -1.5 14.67164179
19: Kansas City Intl 0.0 14.51405836
20: Albany Intl -4.0 14.39712919
21: General Mitchell Intl 0.0 14.16722038
22: Piedmont Triad -2.0 14.11260054
23: Washington Dulles Intl -3.0 13.86420212
24: Cherry Capital Airport -10.0 12.96842105
25: James M Cox Dayton Intl -3.0 12.68048606
26: Louisville International Airport -2.0 12.66938406
27: Chicago Midway Intl -1.0 12.36422360
28: Sacramento Intl 4.0 12.10992908
29: Jacksonville Intl -2.0 11.84483416
30: Nashville Intl -2.0 11.81245891
31: Portland Intl Jetport -4.0 11.66040210
32: Greater Rochester Intl -5.0 11.56064461
33: Hartsfield Jackson Atlanta Intl -1.0 11.30011285
34: Lambert St Louis Intl -3.0 11.07846451
35: Norfolk Intl -4.0 10.94909344
36: Baltimore Washington Intl -5.0 10.72673385
37: Memphis Intl -2.5 10.64531435
38: Port Columbus Intl -3.0 10.60132291
39: Charleston Afb Intl -4.0 10.59296847
40: Philadelphia Intl -3.0 10.12719014
41: Raleigh Durham Intl -3.0 10.05238095
42: Indianapolis Intl -3.0 9.94043412
43: Charlottesville-Albemarle -5.0 9.50000000
44: Cleveland Hopkins Intl -5.0 9.18161129
45: Ronald Reagan Washington Natl -2.0 9.06695204
46: Burlington Intl -4.0 8.95099602
47: Buffalo Niagara Intl -5.0 8.94595186
48: Syracuse Hancock Intl -5.0 8.90392501
49: Denver Intl -2.0 8.60650021
50: Palm Beach Intl -3.0 8.56297210
51: Bob Hope -3.0 8.17567568
52: Fort Lauderdale Hollywood Intl -3.0 8.08212154
53: Bangor Intl -9.0 8.02793296
54: Asheville Regional Airport -1.0 8.00383142
55: Pittsburgh Intl -5.0 7.68099053
56: Gallatin Field -2.0 7.60000000
57: NW Arkansas Regional -2.0 7.46572581
58: Tampa Intl -4.0 7.40852503
59: Charlotte Douglas Intl -3.0 7.36031885
60: Minneapolis St Paul Intl -5.0 7.27016886
61: William P Hobby -4.0 7.17618819
62: Bradley Intl -10.0 7.04854369
63: San Antonio Intl -9.0 6.94537178
64: South Bend Rgnl -3.5 6.50000000
65: Louis Armstrong New Orleans Intl -6.0 6.49017497
66: Key West Intl 7.0 6.35294118
67: Eagle Co Rgnl -4.0 6.30434783
68: Austin Bergstrom Intl -5.0 6.01990875
69: Chicago Ohare Intl -8.0 5.87661475
70: Orlando Intl -5.0 5.45464309
71: Detroit Metro Wayne Co -7.0 5.42996346
72: Portland Intl -5.0 5.14157973
73: Nantucket Mem -3.0 4.85227273
74: Wilmington Intl -7.0 4.63551402
75: Myrtle Beach Intl -13.0 4.60344828
76: Albuquerque International Sunport -5.5 4.38188976
77: George Bush Intercontinental -5.0 4.24079040
78: Norman Y Mineta San Jose Intl -7.0 3.44817073
79: Southwest Florida Intl -5.0 3.23814963
80: San Diego Intl -5.0 3.13916574
81: Sarasota Bradenton Intl -5.0 3.08243131
82: Metropolitan Oakland Intl -9.0 3.07766990
83: General Edward Lawrence Logan Intl -9.0 2.91439222
84: San Francisco Intl -8.0 2.67289152
85: Yampa Valley 2.0 2.14285714
86: Phoenix Sky Harbor Intl -6.0 2.09704733
87: Montrose Regional Airport -10.5 1.78571429
88: Los Angeles Intl -7.0 0.54711094
89: Dallas Fort Worth Intl -9.0 0.32212685
90: Miami Intl -9.0 0.29905978
91: Mc Carran Intl -8.0 0.25772849
92: Salt Lake City Intl -8.0 0.17625459
93: Long Beach -10.0 -0.06202723
94: Martha\\\\'s Vineyard -11.0 -0.28571429
95: Seattle Tacoma Intl -11.0 -1.09909910
96: Honolulu Intl -7.0 -1.36519258
97: John Wayne Arpt Orange Co -11.0 -7.86822660
98: Palm Springs Intl -13.5 -12.72222222
name median_delay mean_delay
(b)
merge(flights, planes, by = "tailnum") |> # merge tables to get model name
_[ , .(avg_speed_mph = mean(distance/(air_time/60), na.rm=TRUE), .N),by = model]|> # calculates averae speed in mph for each model alongside number of flights flown
_[avg_speed_mph == max(avg_speed_mph)] # returns only the fastest model model avg_speed_mph N
<char> <num> <int>
1: 777-222 482.6254 4
rm(list=ls())